import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import numpy as np
plt.style.use('fivethirtyeight')
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pylab as plt
from itertools import count
from operator import itemgetter
from networkx.drawing.nx_agraph import graphviz_layout
import pylab
member_edges = pd.read_csv('member-edges.csv')
member_meta = pd.read_csv('meta-members.csv')
len(member_meta)
member_edges.head()
member_meta.head()
G = nx.path_graph(member_edges)
print(nx.is_connected(G))
no_of_nodes_edges = nx.from_pandas_edgelist(member_edges, 'member1', 'member2', 'weight')
print('Number of nodes is:', len(no_of_nodes_edges.nodes))
print('Number of edges is:', len(no_of_nodes_edges.edges))
member_edges_subset = member_edges[0:8000]
G_nodes = nx.from_pandas_edgelist(member_edges_subset, 'member1', 'member2', 'weight')
print('Number of nodes is:', len(G_nodes.nodes))
print('Number of edges is:', len(G_nodes.edges))
pd.set_option('precision', 10)
G = nx.from_pandas_edgelist(member_edges_subset, 'member1', 'member2', create_using = nx.Graph())
nodes = G.nodes()
degree = G.degree()
colors = [degree[n] for n in nodes]
pos = nx.kamada_kawai_layout(G)
cmap = plt.cm.summer
cmap = plt.cm.spring
vmin = min(colors)
vmax = max(colors)
fig = plt.figure(figsize = (10,8), dpi=70)
nx.draw(G,pos,alpha = 0.5, nodelist = nodes, node_color = 'w', node_size = 10, with_labels= False,font_size = 2,
width = 0.5, cmap = cmap, edge_color ='#f5ea16')
fig.set_facecolor('#050500')
plt.show()
for i in sorted(G_nodes.nodes()):
G_nodes.nodes[i]['degree'] = G_nodes.degree(i)
node_degree = pd.DataFrame([i[1] for i in G_nodes.nodes(data=True)], index=[i[0] for i in G_nodes.nodes(data=True)])
node_degree = node_degree.sort_values(by='degree',ascending= False)
node_degree.index.names=['Actor']
node_degree.reset_index(level=0, inplace=True)
node_degree.head()
df_centrality_bet = nx.betweenness_centrality(G_nodes)
df_centrality_between = pd.DataFrame.from_dict(df_centrality_bet, orient='index')
df_centrality_between.columns = ['betweenness_centrality']
df_centrality_between.index.names = ['Actor']
df_centrality_between.reset_index(level=0, inplace=True)
centrality_data = pd.merge(node_degree,df_centrality_between, on = ['Actor'])
centrality_closeness = nx.closeness_centrality(G_nodes)
df_centrality_closeness = pd.DataFrame.from_dict(centrality_closeness, orient='index')
df_centrality_closeness.columns = ['closeness_centrality']
df_centrality_closeness.index.names = ['Actor']
df_centrality_closeness.reset_index(level=0, inplace=True)
centrality_data= pd.merge(centrality_data, df_centrality_closeness, on = ['Actor'])
centrality_eigen = nx.eigenvector_centrality_numpy(G_nodes)
df_centrality_eigen = pd.DataFrame.from_dict(centrality_eigen, orient='index')
df_centrality_eigen.columns = ['eigenvector_centrality']
df_centrality_eigen.index.names = ['Actor']
df_centrality_eigen.reset_index(level=0, inplace=True)
centrality_data= pd.merge(centrality_data, df_centrality_eigen, on = ['Actor'])
degree_centrality = nx.degree_centrality(G_nodes)
df_centrality_degree = pd.DataFrame.from_dict(degree_centrality, orient='index')
df_centrality_degree.columns = ['degree_centrality']
df_centrality_degree.index.names = ['Actor']
df_centrality_degree.reset_index(level=0, inplace=True)
centrality_data= pd.merge(centrality_data, df_centrality_degree, on = ['Actor'])
centrality_data.head()
print('The most centered people in the network based on degree_centrality are:')
centrality_data.sort_values(by='degree_centrality', ascending=False)[['Actor', 'degree_centrality']][:5]
degree_top_list = sorted(degree_centrality, key=degree_centrality.get, reverse=True)[:10]
print(degree_top_list)
print('The most centered people in the network based on betweenness_centrality are:')
centrality_data.sort_values(by='betweenness_centrality', ascending=False)[['Actor', 'betweenness_centrality']][:5]
bet_top_list = sorted(df_centrality_bet, key=df_centrality_bet.get, reverse=True)[:10]
print(bet_top_list)
print('The most centered people in the network based on closeness_centrality are:')
centrality_data.sort_values(by='closeness_centrality', ascending=False)[['Actor', 'closeness_centrality']][:5]
close_top_list = sorted(centrality_closeness, key=centrality_closeness.get, reverse=True)[:10]
print(close_top_list)
print('The most centered people in the network based on eigenvector_centrality are:')
centrality_data.sort_values(by='eigenvector_centrality', ascending=False)[['Actor', 'eigenvector_centrality']][:5]
eigen_top_list = sorted(centrality_eigen, key=centrality_eigen.get, reverse=True)[:10]
print(eigen_top_list)
pos = nx.spring_layout(G_nodes)
node_color = [20000.0 * G_nodes.degree(v) for v in G_nodes]
node_size = [v * 10000 for v in degree_centrality.values()]
plt.style.use('classic')
plt.rcParams['figure.figsize'] = (20, 15)
nx.draw_networkx(G_nodes, pos=pos, with_labels=False,
node_color=node_color,
node_size=node_size )
plt.axis('off')
sorted(degree_centrality, key=degree_centrality.get, reverse=True)[:5]
pos = nx.kamada_kawai_layout(G_nodes)
node_color = [20000.0 * G_nodes.degree(v) for v in G_nodes]
node_size = [v * 10000 for v in df_centrality_bet.values()]
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsiz
e'] = (20, 15)
nx.draw_networkx(G_nodes, pos=pos, with_labels=False,
node_color=node_color,
node_size=node_size )
plt.axis('off')
sorted(df_centrality_bet, key=df_centrality_bet.get, reverse=True)[:5]
pos = nx.spring_layout(G_nodes)
node_color = [20000.0 * G_nodes.degree(v) for v in G_nodes]
node_size = [v * 1000 for v in centrality_closeness.values()]
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (20, 15)
nx.draw_networkx(G_nodes, pos=pos, with_labels=False,
node_color=node_color,
node_size=node_size )
plt.axis('off')
sorted(centrality_closeness, key=centrality_closeness.get, reverse=True)[:5]
pos = nx.spring_layout(G_nodes)
node_color = [20000.0 * G_nodes.degree(v) for v in G_nodes]
node_size = [v * 10000 for v in centrality_eigen.values()]
plt.style.use('classic')
plt.rcParams['figure.figsize'] = (20, 15)
nx.draw_networkx(G_nodes, pos=pos, with_labels=False,
node_color=node_color,
node_size=node_size )
plt.axis('off')
sorted(centrality_eigen, key=centrality_eigen.get, reverse=True)[:5]
group_edges = pd.read_csv('group-edges.csv')
group_meta = pd.read_csv('meta-groups.csv', index_col='group_id')
G1 = nx.path_graph(group_edges)
print(nx.is_connected(G1))
group_edges.head()
group_meta.head()
group_data = nx.from_pandas_edgelist(group_edges, 'group1', 'group2', 'weight')
print('Number of nodes is:', len(group_data.nodes))
print('Number of edges is:', len(group_data.edges))
print('Number of groups with group_mata data:', len(group_meta))
# Matching the group_meta data with original group_edges data
group_data_member = group_meta.loc[[i for i in group_data.nodes]]
print('Number of groups matching with the group_edges:', len(group_data_member))
pd.set_option('precision', 10)
G = nx.from_pandas_edgelist(group_edges, 'group1', 'group2', create_using = nx.Graph())
nodes = G.nodes()
degree = G.degree()
colors = [degree[n] for n in nodes]
pos = nx.kamada_kawai_layout(G)
cmap = plt.cm.summer
cmap = plt.cm.spring
vmin = min(colors)
vmax = max(colors)
fig = plt.figure(figsize = (8,8), dpi=70)
nx.draw(G,pos,alpha = 0.5, nodelist = nodes, node_color = 'w', node_size = 10, with_labels= False,font_size = 2,
width = 0.5, cmap = cmap, edge_color ='#16f7e8')
fig.set_facecolor('#050500')
plt.show()
group_data_member['degree'] = pd.Series(dict(nx.degree(group_data)))
group_data_member['degree_centrality'] = pd.Series(dict(nx.degree_centrality(group_data)))
group_data_member['clustering'] = pd.Series(nx.clustering(group_data))
group_data_member['betweenness_centrality'] = pd.Series(nx.betweenness_centrality(group_data, weight='weight'))
group_data_member.head()
most_member_groups = group_data_member.sort_values(by='num_members', ascending=False)[['group_name', 'num_members']][:10]
most_member_groups
print('The ten most central groups in the network based on Degree centrality are:')
most_degree_groups = group_data_member.sort_values(by='degree_centrality', ascending=False).group_name[:10]
most_degree_groups
pos = nx.spring_layout(group_data)
node_color = [50000.0 * group_data.degree(v) for v in group_data]
node_size = [v * 20000 for v in group_data_member['degree_centrality']]
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (15, 8)
nx.draw_networkx(group_data, pos=pos, with_labels=False,
node_color=node_color,
node_size=node_size, alpha=0.9)
plt.axis('off')
sorted(group_data_member['degree_centrality'], reverse=True)[:5]
ax = plt.gca()
ax.set_aspect(1)
ax.axis('off')
ax.set_title('Network of Groups with Degree Centrality ')
plt.show()
print('The ten most central groups in the network based on centrality are:')
most_cental_groups = group_data_member.sort_values(by='betweenness_centrality', ascending=False).group_name[:10]
most_cental_groups
pos = nx.spring_layout(group_data)
node_color = [50000.0 * group_data.degree(v) for v in group_data]
node_size = [v * 20000 for v in group_data_member['betweenness_centrality']]
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (15, 8)
nx.draw_networkx(group_data, pos=pos, with_labels=False,
node_color=node_color,
node_size=node_size, alpha=0.9)
plt.axis('off')
sorted(group_data_member['betweenness_centrality'], reverse=True)[:5]
ax = plt.gca()
ax.set_aspect(1)
ax.axis('off')
ax.set_title('Network of Groups with Centrality ')
plt.show()
print('The ten most clustered groups in the network based on clustering are:')
most_clustered_groups = group_data_member.sort_values(by='clustering', ascending=False).group_name[:10]
most_clustered_groups
pos = nx.kamada_kawai_layout(group_data)
node_color = [50000.0 * group_data.degree(v) for v in group_data]
node_size = [v * 100 for v in group_data_member['clustering']]
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (15, 8)
nx.draw_networkx(group_data, pos=pos, with_labels=False,
node_color=node_color,
node_size=node_size, alpha=0.9)
plt.axis('off')
sorted(group_data_member['clustering'], reverse=True)[:5]
ax = plt.gca()
ax.set_aspect(1)
ax.axis('off')
ax.set_title('Network of Groups with clustering ')
plt.show()
len(groups_unique_names)
groups_unique_names = group_meta['category_name'].unique()
print(groups_unique_names)
print(' ')
large_group = group_meta[group_meta['category_name'] == 'Tech']
print('largest group in the network of group_data is:',len(large_group))
Tech_group = group_meta.loc[group_meta.category_name == 'Tech']
print('Number of tech groups with metadata.',Tech_group.shape[0])
Group_tech = group_data.subgraph(Tech_group.index)
G_tech = [Group_tech.subgraph(i) for i in nx.connected_components(Group_tech)][0]
tech_group_cat = Tech_group.loc[(node for node in G_tech.nodes)]
print('Total number of Tech groups after matching the dataset with meta data:', len(tech_group_cat))
tech_group_cat.head()
tech_group_cat.sort_values(by='num_members', ascending=False)[['group_name', 'num_members']][:10]
tech_group_cat['degree'] = pd.Series(dict(nx.degree(G_tech)))
tech_group_cat['clustering'] = pd.Series(nx.clustering(G_tech))
tech_group_cat['centrality'] = pd.Series(nx.betweenness_centrality(G_tech))
tech_group_cat.sort_values(by='centrality', ascending=False)[['group_name', 'centrality']][:10]
pos = nx.kamada_kawai_layout(G_tech)
plt.style.use('fivethirtyeight')
fig = plt.figure(figsize = (10,8), dpi=70)
node_size = [v for v in tech_group_cat['degree']]
cmap = plt.cm.summer
cmap = plt.cm.spring
nx.draw_networkx(G_tech, pos, with_labels=False, node_size=node_size, width=0.5,cmap = cmap, edge_color ='#16f7e8', node_color = 'r', alpha=0.5)
fig.set_facecolor('#050500')
ax = plt.gca()
ax.set_aspect(1)
ax.axis('off')
ax.set_title('Nashville Tech MeetUps')
plt.show()
# plt.figure(dpi=150)
pos = nx.kamada_kawai_layout(G_tech)
plt.style.use('fivethirtyeight')
fig = plt.figure(figsize = (15,10), dpi=70)
node_size = [v * 20000 for v in tech_group_cat['centrality']]
cmap = plt.cm.summer
cmap = plt.cm.spring
nx.draw_networkx(G_tech, pos, with_labels=False, node_size=node_size, width=0.5,cmap = cmap, edge_color ='yellow', node_color = 'r', alpha=0.5)
fig.set_facecolor('#050500')
ax = plt.gca()
ax.set_aspect(1)
ax.axis('off')
ax.set_title('Nashville Tech MeetUps')
plt.show()
Below is the source code for the community detection using Networkx
def community_layout(G_tech, partition):
"""
Compute the layout for a modular graph.
Arguments:
----------
g -- networkx.Graph or networkx.DiGraph instance
graph to plot
partition -- dict mapping int node -> int community
graph partitions
Returns:
--------
pos -- dict mapping int node -> (float x, float y)
node positions
"""
pos_communities = _position_communities(G_tech, partition, scale=3.)
pos_nodes = _position_nodes(G_tech, partition, scale=1.)
pos = dict()
for node in G_tech.nodes():
pos[node] = pos_communities[node] + pos_nodes[node]
return pos
def _position_communities(G_tech, partition, **kwargs):
between_community_edges = _find_between_community_edges(G_tech, partition)
communities = set(partition.values())
hypergraph = nx.DiGraph()
hypergraph.add_nodes_from(communities)
for (ci, cj), edges in between_community_edges.items():
hypergraph.add_edge(ci, cj, weight=len(edges))
pos_communities = nx.spring_layout(hypergraph, **kwargs)
pos = dict()
for node, community in partition.items():
pos[node] = pos_communities[community]
return pos
def _find_between_community_edges(G_tech, partition):
edges = dict()
for (ni, nj) in G_tech.edges():
ci = partition[ni]
cj = partition[nj]
if ci != cj:
try:
edges[(ci, cj)] += [(ni, nj)]
except KeyError:
edges[(ci, cj)] = [(ni, nj)]
return edges
def _position_nodes(G_tech, partition, **kwargs):
"""
Positions nodes within communities.
"""
communities = dict()
for node, community in partition.items():
try:
communities[community] += [node]
except KeyError:
communities[community] = [node]
pos = dict()
for ci, nodes in communities.items():
subgraph = G_tech.subgraph(nodes)
pos_subgraph = nx.spring_layout(subgraph, **kwargs)
pos.update(pos_subgraph)
return pos
import community
partition = community.community_louvain.best_partition(G_tech)
tech_group_cat['community'] = pd.Series(partition)
print("Modularity:", community.modularity(partition, G_tech))
plt.figure(dpi=150)
pos = community_layout(G_tech, partition)
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (15, 10)
cdict = {ii: sns.color_palette()[ii] for ii in set(partition.values())}
nx.draw_networkx(G_tech, pos, node_size=60,node_color=[cdict[ii] for ii in partition.values()],
with_labels=True, width=0.15,
cmap='rainbow')
plt.axis('off')
plt.show()
for i in tech_group_cat.community.unique():
print('Most central groups in Community {}...'.format(i))
tech_df = tech_group_cat.sort_values(by='centrality', ascending=False).loc[tech_group_cat.community == i]
for k, g in tech_df.head().iterrows():
print('\t{}'.format(g.group_name, g.num_members))
import community
G_tech = nx.karate_club_graph()
partition = community.community_louvain.best_partition(G_tech)
tech_group_cat['community'] = pd.Series(partition)
plt.figure(dpi=150)
pos = nx.spring_layout(G_tech)
plt.figure(figsize=(8, 8))
plt.axis('off')
nx.draw_networkx_nodes(G_tech, pos, node_size=600,with_labels=True, node_color=list(partition.values()))
nx.draw_networkx_edges(G_tech, pos, alpha=0.3)
plt.show(G_tech)